In this script we conduct the estimation for the measure_arguments approach. It is to verify and detect arguments cost for opcodes execution.

Below are the parameters used for this report.

EVM client ENV=geth.

The file with programs PROGRAMS=pg_arguments_arithmetic.csv.

The file with measurement results RESULTS=results_arguments_arithmetic_geth.csv.

The optional comma separated list of estimated costs obtained in the marginal procedure MARGINAL_ESTIMATED_COST-.

The optional file to output estimated costs and results OUTPUT_ESTIMATED_COST-.

if ( !removed_outliers) {
  boxplot(measure_total_time_ns ~ opcode, data=measurements, las=2, outline=TRUE, log='y', main=paste(env, 'all'))
}
if (removed_outliers) {
  par(mfrow=c(2, 1))
  
  # before
  boxplot(measure_total_time_ns ~ opcode, data=measurements, las=2, outline=TRUE, log='y', main=paste(env, 'all'))

  measurements = remove_outliers(measurements, 'measure_total_time_ns')
  
  # after
  boxplot(measure_total_time_ns ~ opcode, data=measurements, las=2, outline=TRUE, log='y', main=paste(env, 'no_outliers'))
}

Detailed view

This is massive and detailed overview on the impact of arguments. Because of the number of charts, only op count = 30 is eligible. Feel free to change it, but that should not be anyhow more informative. The visualizations do not guarantee that all dependencies are clearly seen. Especially for binary and ternary opcodes where impacts of arg0, arg1 and arg2 are mixed. But if a dependency is graphically noticeable that you should expect also statistical dependency.

for(i in 1:nrow(opcode_arity_df)) {
  opcode_arity <- opcode_arity_df[i,]
  opcode <- opcode_arity[["opcode"]]
  # skip opcodes without measurements
  if(length(which(measurements$opcode == opcode & measurements$op_count == 30)) == 0) {
    next
  }
  arity <- opcode_arity[["arity"]]
  if (arity >= 1) {
    plot(measure_total_time_ns ~ arg0, data=measurements[which(measurements$opcode == opcode & measurements$op_count == 30), ], pch=5, col='blue')
    title(main = paste(env, opcode, 'arg0', 'opcount 30'))
  }
  if (arity >= 2) {
    plot(measure_total_time_ns ~ arg1, data=measurements[which(measurements$opcode == opcode & measurements$op_count == 30), ], pch=5, col='blue')
    title(main = paste(env, opcode, 'arg1', 'opcount 30'))
  }
  if (arity >= 3) {
    plot(measure_total_time_ns ~ arg2, data=measurements[which(measurements$opcode == opcode & measurements$op_count == 30), ], pch=5, col='blue')
    title(main = paste(env, opcode, 'arg2', 'opcount 30'))
  }
}

Models

This is the so-called “first-pass” at the estimation procedure, where we estimated all possible argument impact variables for all OPCODEs. We gather all the results in the first_pass table, inspect this to see where the arguments turned out to be significantly impacting the computation cost.

The argument is considered impacting on the gas cost of opcode execution if its coefficient in the regression model

  1. is noticeable comparing to the marginal cost (at least 0.01 %),
  2. has high correlation (Pr(>|t|)=Pr(>|Estimate/Std. Error|)<0.001).

Moreover, these opcodes are considered having impacting arguments by default as they are explicitly included in the current gas cost calculation.

Below is the listing of arguments impact.

if (details) {
  args_estimates
} else {
  args_estimates[which(args_estimates$has_impacting), ]
}
##        opcode arg      ns_raw          ns_p has_impacting
## 1         ADD   0  2.29123494  8.777741e-03         FALSE
## 2         ADD   1  0.90691340  2.874219e-01         FALSE
## 3      ADDMOD   0  1.10412753  2.250864e-01         FALSE
## 4      ADDMOD   1  1.30702925  1.685927e-01         FALSE
## 5      ADDMOD   2 -0.59812483  5.835785e-01         FALSE
## 6         AND   0 -0.77395381  3.495778e-01         FALSE
## 7         AND   1 -0.64204584  4.753191e-01         FALSE
## 8        BYTE   0 -0.39576077  6.595761e-01         FALSE
## 9        BYTE   1  0.59245938  5.017880e-01         FALSE
## 10        DIV   0  1.14360519  3.516143e-01         FALSE
## 11        DIV   1 -0.31547091  7.874282e-01         FALSE
## 12         EQ   0 -2.15854080  1.164984e-02         FALSE
## 13         EQ   1 -0.46463851  6.041499e-01         FALSE
## 14        EXP   0  0.33560857  5.821580e-01         FALSE
## 15        EXP   1 30.84189760 5.897099e-225          TRUE
## 16         GT   0 -0.51009221  5.789402e-01         FALSE
## 17         GT   1  0.40077936  6.523409e-01         FALSE
## 18     ISZERO   0 -1.41542616  1.140299e-01         FALSE
## 19  KECCAK256   0  0.01141304  3.231326e-01         FALSE
## 20  KECCAK256   1  1.58341819  0.000000e+00          TRUE
## 21         LT   0 -0.47833838  5.896978e-01         FALSE
## 22         LT   1  0.56664144  5.345833e-01         FALSE
## 23        MOD   0  0.20448844  8.677863e-01         FALSE
## 24        MOD   1  0.88967145  4.666156e-01         FALSE
## 25        MUL   0  0.05054622  9.542560e-01         FALSE
## 26        MUL   1  0.81113273  3.585138e-01         FALSE
## 27     MULMOD   0 -0.02743146  9.751914e-01         FALSE
## 28     MULMOD   1 -0.07435254  9.326493e-01         FALSE
## 29     MULMOD   2  0.25813993  7.818149e-01         FALSE
## 30        NOT   0  0.25668277  7.679307e-01         FALSE
## 31         OR   0  0.53545283  5.335712e-01         FALSE
## 32         OR   1 -1.87314092  2.944656e-02         FALSE
## 33        SAR   0  1.35188625  1.330102e-01         FALSE
## 34        SAR   1  0.69118091  4.364115e-01         FALSE
## 35       SDIV   0  0.22359576  8.606565e-01         FALSE
## 36       SDIV   1  0.98912846  4.179878e-01         FALSE
## 37        SGT   0  0.57945060  5.355509e-01         FALSE
## 38        SGT   1  0.84826571  3.162259e-01         FALSE
## 39        SHL   0 -0.29011344  7.300892e-01         FALSE
## 40        SHL   1  0.17484256  8.414497e-01         FALSE
## 41        SHR   0  0.91181945  2.916879e-01         FALSE
## 42        SHR   1 -1.91350689  2.454875e-02         FALSE
## 43 SIGNEXTEND   0  0.33157733  6.932240e-01         FALSE
## 44 SIGNEXTEND   1 -0.70005733  4.195110e-01         FALSE
## 45        SLT   0  0.17198676  8.453711e-01         FALSE
## 46        SLT   1 -0.15901532  8.515104e-01         FALSE
## 47       SMOD   0 -0.76751431  5.204208e-01         FALSE
## 48       SMOD   1 -0.27172729  8.200405e-01         FALSE
## 49        SUB   0 -0.49804663  5.754823e-01         FALSE
## 50        SUB   1 -0.38779647  6.612229e-01         FALSE
## 51        XOR   0  0.03085968  9.702871e-01         FALSE
## 52        XOR   1 -0.83092885  3.211598e-01         FALSE

The blue graph is the measurements, the red line is the estimated arguments cost shifted by the estimated constant cost of the programs execution. The default opcodes count is 30.

proceed_with_opcodes <- first_pass[which(first_pass$has_impacting), "opcode"]
for(opcode in proceed_with_opcodes) {
  if (nrow(measurements[which(measurements$opcode == opcode & measurements$op_count == 30), ]) == 0) {
    stop(paste("measurements for ", opcode, "are invalid, there are no measurements for op_count==30"))
  }
  opcode_estimates <- first_pass[which(first_pass$opcode == opcode),]
  if (opcode_estimates[["has_impacting_arg0"]]) {
    slope = opcode_estimates[["arg0_ns_raw"]] * 30
    intercept = opcode_estimates[["intercept"]] + opcode_estimates[["estimate_marginal_ns"]] * 30
    xmax = max(measurements[which(measurements$opcode == opcode & measurements$op_count == 30), "arg0"])
    ymin = min(measurements[which(measurements$opcode == opcode & measurements$op_count == 30), "measure_total_time_ns"], intercept, intercept + slope * xmax)
    ymax = max(measurements[which(measurements$opcode == opcode & measurements$op_count == 30), "measure_total_time_ns"], intercept, intercept + slope * xmax)
    plot(measure_total_time_ns ~ arg0, data=measurements[which(measurements$opcode == opcode & measurements$op_count == 30), ], pch=5, col='blue', ylim=c(ymin*0.95, ymax*1.05))
    title(main = paste(env, opcode, 'arg0', 'opcount 30'))
    abline(a=intercept, b=slope, col="red")
  }
  if (opcode_estimates[["has_impacting_arg1"]]) {
    slope = opcode_estimates[["arg1_ns_raw"]] * 30
    intercept = opcode_estimates[["intercept"]] + opcode_estimates[["estimate_marginal_ns"]] * 30
    xmax = max(measurements[which(measurements$opcode == opcode & measurements$op_count == 30), "arg1"])
    ymin = min(measurements[which(measurements$opcode == opcode & measurements$op_count == 30), "measure_total_time_ns"], intercept, intercept + slope * xmax)
    ymax = max(measurements[which(measurements$opcode == opcode & measurements$op_count == 30), "measure_total_time_ns"], intercept, intercept + slope * xmax)
    plot(measure_total_time_ns ~ arg1, data=measurements[which(measurements$opcode == opcode & measurements$op_count == 30), ], pch=5, col='blue', ylim=c(ymin*0.95, ymax*1.05))
    title(main = paste(env, opcode, 'arg1', 'opcount 30'))
    abline(a=intercept, b=slope, col="red")
  }
  if (opcode_estimates[["has_impacting_arg2"]]) {
    slope = opcode_estimates[["arg2_ns_raw"]] * 30
    intercept = opcode_estimates[["intercept"]] + opcode_estimates[["estimate_marginal_ns"]] * 30
    xmax = max(measurements[which(measurements$opcode == opcode & measurements$op_count == 30), "arg2"])
    ymin = min(measurements[which(measurements$opcode == opcode & measurements$op_count == 30), "measure_total_time_ns"], intercept, intercept + slope * xmax)
    ymax = max(measurements[which(measurements$opcode == opcode & measurements$op_count == 30), "measure_total_time_ns"], intercept, intercept + slope * xmax)
    plot(measure_total_time_ns ~ arg2, data=measurements[which(measurements$opcode == opcode & measurements$op_count == 30), ], pch=5, col='blue', ylim=c(ymin*0.95, ymax*1.05))
    title(main = paste(env, opcode, 'arg2', 'opcount 30'))
    abline(a=intercept, b=slope, col="red")
  }
}

Expensive and trivial modes

In some cases an execution of opcode is trivial. For instance DIV x, y for x < y fives 0 without further work. For selected opcodes we investigate the trivial mode and so the called expensive mode when typical algorithm needs to be run fully.

first_pass[which(!is.na(first_pass$expensive_ns_raw)), c("opcode", "estimate_marginal_ns", "estimate_marginal_ns_p", "expensive_ns_raw", "expensive_ns_p")]
##    opcode estimate_marginal_ns estimate_marginal_ns_p expensive_ns_raw
## 2  ADDMOD             8.049708              0.7959371       -33.269516
## 5     DIV           -11.378359              0.6467815        -5.666009
## 12    MOD           -23.148542              0.3628147        23.198470
## 14 MULMOD            39.281711              0.1748634       -15.479091
## 18   SDIV            -8.774008              0.7196046         8.607449
## 24   SMOD            14.347613              0.5518920        -4.558836
##    expensive_ns_p
## 2       0.1955791
## 5       0.8362751
## 12      0.4066609
## 14      0.5642700
## 18      0.7577850
## 24      0.8657557

Heat maps help to visualize dependency between two arguments, in particular division into trivial and expensive modes. Other strong or weak dependencies can be also detected.

for(opcode in proceed_with_opcodes) {
  working_args <- args_estimates[which(args_estimates$opcode == opcode), "arg"]
  if (length(working_args) == 1) {
    plot_args_dependency_1(measurements, opcode, working_args[[1]])
  } else if (length(working_args) == 2) {
    plot_args_dependency_2(measurements, opcode, working_args)
  }
}

Verification against marginal estimations

In this section we compare opcodes cost obtained in the arguments course with previous estimations obtained in the marginal procedure.

No marginal estimated cost files given.

if (params$output_estimated_cost != "") {
  write.csv(first_pass, params$output_estimated_cost, quote=FALSE, row.names=FALSE)
}